Libraries

# if (!require("renv")) install.packages("renv")
# library(renv)
# renv::restore()
library(here)
here() starts at C:/Users/Marcony1/OneDrive - Fundacion Universidad de las Americas Puebla/Documents/MDS/Block 6/DSCI 532/DSCI_532_individual-assignment_marcony1
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(arrow)
Warning: package ‘arrow’ was built under R version 4.3.3
Attaching package: ‘arrow’

The following object is masked from ‘package:utils’:

    timestamp

Read data

zip_file <- here("data", "raw", "iter_00_cpv2020_csv.zip")
temp_dir <- here("temp")
dir.create(temp_dir, showWarnings = FALSE)

unzip(zip_file, files = c("iter_00_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv", "iter_00_cpv2020/diccionario_datos/diccionario_datos_iter_00CSV20.csv"), exdir = temp_dir)

data_path <- here(temp_dir,
                 "iter_00_cpv2020",
                 "conjunto_de_datos",
                 "conjunto_de_datos_iter_00CSV20.csv")

dict_path <- here(temp_dir,
                 "iter_00_cpv2020",
                 "diccionario_datos",
                 "diccionario_datos_iter_00CSV20.csv")

info_dict <- read_csv(dict_path)
New names:Rows: 290 Columns: 10── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): ...1, ...2, ...3, ...4, ...5, ...6
lgl (4): ...7, ...8, ...9, ...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- read_csv(data_path)
Rows: 195662 Columns: 286── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (283): ENTIDAD, NOM_ENT, MUN, NOM_MUN, LOC, NOM_LOC, LONGITUD, LATITUD, ALTITUD, POBFEM, POBMAS, P_0A2, P_0A2_F, P_0A2_M, P_3YMAS, P_3YMAS_F, P_3Y...
dbl   (3): POBTOT, VIVTOT, TVIVHAB
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unlink(temp_dir, recursive = TRUE)
# Exporting dictionary file
write_csv(info_dict,
          here("data", "raw", "diccionario_datos_iter_00CSV20.csv"))

Exploration

head(df)
head(info_dict)
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ENTIDAD    : chr [1:195662] "00" "00" "00" "01" ...
 $ NOM_ENT    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
 $ MUN        : chr [1:195662] "000" "000" "000" "000" ...
 $ NOM_MUN    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
 $ LOC        : chr [1:195662] "0000" "9998" "9999" "0000" ...
 $ NOM_LOC    : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
 $ LONGITUD   : chr [1:195662] NA NA NA NA ...
 $ LATITUD    : chr [1:195662] NA NA NA NA ...
 $ ALTITUD    : chr [1:195662] NA NA NA NA ...
 $ POBTOT     : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
 $ POBFEM     : chr [1:195662] "64540634" "96869" "61324" "728924" ...
 $ POBMAS     : chr [1:195662] "61473390" "153485" "85801" "696683" ...
 $ P_0A2      : chr [1:195662] "5764054" "10493" "6798" "71864" ...
 $ P_0A2_F    : chr [1:195662] "2848875" "5193" "3407" "35604" ...
 $ P_0A2_M    : chr [1:195662] "2915179" "5300" "3391" "36260" ...
 $ P_3YMAS    : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
 $ P_3YMAS_F  : chr [1:195662] "61554567" "91463" "57628" "692561" ...
 $ P_3YMAS_M  : chr [1:195662] "58422017" "147978" "82129" "659674" ...
 $ P_5YMAS    : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
 $ P_5YMAS_F  : chr [1:195662] "59433559" "87931" "55256" "666713" ...
 $ P_5YMAS_M  : chr [1:195662] "56259714" "144155" "79772" "632956" ...
 $ P_12YMAS   : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
 $ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
 $ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
 $ P_15YMAS   : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
 $ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
 $ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
 $ P_18YMAS   : chr [1:195662] "87492680" "186968" "104612" "960764" ...
 $ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
 $ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
 $ P_3A5      : chr [1:195662] "6462212" "10900" "7028" "78833" ...
 $ P_3A5_F    : chr [1:195662] "3193548" "5270" "3511" "38679" ...
 $ P_3A5_M    : chr [1:195662] "3268664" "5630" "3517" "40154" ...
 $ P_6A11     : chr [1:195662] "12986217" "20793" "13506" "156683" ...
 $ P_6A11_F   : chr [1:195662] "6398755" "10082" "6574" "77289" ...
 $ P_6A11_M   : chr [1:195662] "6587462" "10711" "6932" "79394" ...
 $ P_8A14     : chr [1:195662] "15287375" "24342" "16724" "181905" ...
 $ P_8A14_F   : chr [1:195662] "7531118" "11538" "7679" "89383" ...
 $ P_8A14_M   : chr [1:195662] "7756257" "12804" "9045" "92522" ...
 $ P_12A14    : chr [1:195662] "6542801" "10337" "7693" "77815" ...
 $ P_12A14_F  : chr [1:195662] "3229273" "4767" "3268" "38206" ...
 $ P_12A14_M  : chr [1:195662] "3313528" "5570" "4425" "39609" ...
 $ P_15A17    : chr [1:195662] "6492674" "10443" "6918" "78140" ...
 $ P_15A17_F  : chr [1:195662] "3202134" "4830" "3091" "38298" ...
 $ P_15A17_M  : chr [1:195662] "3290540" "5613" "3827" "39842" ...
 $ P_18A24    : chr [1:195662] "14736111" "27841" "16336" "180847" ...
 $ P_18A24_F  : chr [1:195662] "7398617" "11140" "6760" "90632" ...
 $ P_18A24_M  : chr [1:195662] "7337494" "16701" "9576" "90215" ...
 $ P_15A49_F  : chr [1:195662] "33885546" "47693" "29297" "388917" ...
 $ P_60YMAS   : chr [1:195662] "15142976" "37383" "21277" "145376" ...
 $ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
 $ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
 $ REL_H_M    : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
 $ POB0_14    : chr [1:195662] "31755284" "52523" "35025" "385195" ...
 $ POB15_64   : chr [1:195662] "83663440" "171209" "96250" "941834" ...
 $ POB65_MAS  : chr [1:195662] "10321914" "26202" "15280" "97070" ...
 $ P_0A4      : chr [1:195662] "10047365" "17848" "11527" "124430" ...
 $ P_0A4_F    : chr [1:195662] "4969883" "8725" "5779" "61452" ...
 $ P_0A4_M    : chr [1:195662] "5077482" "9123" "5748" "62978" ...
 $ P_5A9      : chr [1:195662] "10764379" "17380" "11274" "131048" ...
 $ P_5A9_F    : chr [1:195662] "5311288" "8526" "5558" "64689" ...
 $ P_5A9_M    : chr [1:195662] "5453091" "8854" "5716" "66359" ...
 $ P_10A14    : chr [1:195662] "10943540" "17295" "12224" "129717" ...
 $ P_10A14_F  : chr [1:195662] "5389280" "8061" "5423" "63637" ...
 $ P_10A14_M  : chr [1:195662] "5554260" "9234" "6801" "66080" ...
 $ P_15A19    : chr [1:195662] "10806690" "18303" "11484" "131967" ...
 $ P_15A19_F  : chr [1:195662] "5344540" "8138" "5140" "65064" ...
 $ P_15A19_M  : chr [1:195662] "5462150" "10165" "6344" "66903" ...
 $ P_20A24    : chr [1:195662] "10422095" "19981" "11770" "127020" ...
 $ P_20A24_F  : chr [1:195662] "5256211" "7832" "4711" "63866" ...
 $ P_20A24_M  : chr [1:195662] "5165884" "12149" "7059" "63154" ...
 $ P_25A29    : chr [1:195662] "9993001" "20584" "12238" "118426" ...
 $ P_25A29_F  : chr [1:195662] "5131597" "7125" "4427" "60285" ...
 $ P_25A29_M  : chr [1:195662] "4861404" "13459" "7811" "58141" ...
 $ P_30A34    : chr [1:195662] "9420827" "19601" "11315" "106825" ...
 $ P_30A34_F  : chr [1:195662] "4893101" "6309" "4074" "55174" ...
 $ P_30A34_M  : chr [1:195662] "4527726" "13292" "7241" "51651" ...
 $ P_35A39    : chr [1:195662] "9020276" "18645" "10357" "99257" ...
 $ P_35A39_F  : chr [1:195662] "4688746" "6289" "3825" "51483" ...
 $ P_35A39_M  : chr [1:195662] "4331530" "12356" "6532" "47774" ...
 $ P_40A44    : chr [1:195662] "8503586" "17934" "9705" "92378" ...
 $ P_40A44_F  : chr [1:195662] "4441282" "6060" "3743" "48539" ...
 $ P_40A44_M  : chr [1:195662] "4062304" "11874" "5962" "43839" ...
 $ P_45A49    : chr [1:195662] "7942413" "16840" "8668" "84669" ...
 $ P_45A49_F  : chr [1:195662] "4130069" "5940" "3377" "44506" ...
 $ P_45A49_M  : chr [1:195662] "3812344" "10900" "5291" "40163" ...
 $ P_50A54    : chr [1:195662] "7037532" "15070" "7878" "74121" ...
 $ P_50A54_F  : chr [1:195662] "3705369" "5481" "3239" "39510" ...
 $ P_50A54_M  : chr [1:195662] "3332163" "9589" "4639" "34611" ...
 $ P_55A59    : chr [1:195662] "5695958" "13070" "6838" "58865" ...
 $ P_55A59_F  : chr [1:195662] "3002982" "4728" "2823" "31257" ...
 $ P_55A59_M  : chr [1:195662] "2692976" "8342" "4015" "27608" ...
 $ P_60A64    : chr [1:195662] "4821062" "11181" "5997" "48306" ...
 $ P_60A64_F  : chr [1:195662] "2563200" "4050" "2511" "25871" ...
 $ P_60A64_M  : chr [1:195662] "2257862" "7131" "3486" "22435" ...
 $ P_65A69    : chr [1:195662] "3645077" "9160" "5052" "35823" ...
 $ P_65A69_F  : chr [1:195662] "1938227" "3343" "2130" "19125" ...
 $ P_65A69_M  : chr [1:195662] "1706850" "5817" "2922" "16698" ...
 $ P_70A74    : chr [1:195662] "2647340" "6903" "3852" "25586" ...
  [list output truncated]
 - attr(*, "spec")=
  .. cols(
  ..   ENTIDAD = col_character(),
  ..   NOM_ENT = col_character(),
  ..   MUN = col_character(),
  ..   NOM_MUN = col_character(),
  ..   LOC = col_character(),
  ..   NOM_LOC = col_character(),
  ..   LONGITUD = col_character(),
  ..   LATITUD = col_character(),
  ..   ALTITUD = col_character(),
  ..   POBTOT = col_double(),
  ..   POBFEM = col_character(),
  ..   POBMAS = col_character(),
  ..   P_0A2 = col_character(),
  ..   P_0A2_F = col_character(),
  ..   P_0A2_M = col_character(),
  ..   P_3YMAS = col_character(),
  ..   P_3YMAS_F = col_character(),
  ..   P_3YMAS_M = col_character(),
  ..   P_5YMAS = col_character(),
  ..   P_5YMAS_F = col_character(),
  ..   P_5YMAS_M = col_character(),
  ..   P_12YMAS = col_character(),
  ..   P_12YMAS_F = col_character(),
  ..   P_12YMAS_M = col_character(),
  ..   P_15YMAS = col_character(),
  ..   P_15YMAS_F = col_character(),
  ..   P_15YMAS_M = col_character(),
  ..   P_18YMAS = col_character(),
  ..   P_18YMAS_F = col_character(),
  ..   P_18YMAS_M = col_character(),
  ..   P_3A5 = col_character(),
  ..   P_3A5_F = col_character(),
  ..   P_3A5_M = col_character(),
  ..   P_6A11 = col_character(),
  ..   P_6A11_F = col_character(),
  ..   P_6A11_M = col_character(),
  ..   P_8A14 = col_character(),
  ..   P_8A14_F = col_character(),
  ..   P_8A14_M = col_character(),
  ..   P_12A14 = col_character(),
  ..   P_12A14_F = col_character(),
  ..   P_12A14_M = col_character(),
  ..   P_15A17 = col_character(),
  ..   P_15A17_F = col_character(),
  ..   P_15A17_M = col_character(),
  ..   P_18A24 = col_character(),
  ..   P_18A24_F = col_character(),
  ..   P_18A24_M = col_character(),
  ..   P_15A49_F = col_character(),
  ..   P_60YMAS = col_character(),
  ..   P_60YMAS_F = col_character(),
  ..   P_60YMAS_M = col_character(),
  ..   REL_H_M = col_character(),
  ..   POB0_14 = col_character(),
  ..   POB15_64 = col_character(),
  ..   POB65_MAS = col_character(),
  ..   P_0A4 = col_character(),
  ..   P_0A4_F = col_character(),
  ..   P_0A4_M = col_character(),
  ..   P_5A9 = col_character(),
  ..   P_5A9_F = col_character(),
  ..   P_5A9_M = col_character(),
  ..   P_10A14 = col_character(),
  ..   P_10A14_F = col_character(),
  ..   P_10A14_M = col_character(),
  ..   P_15A19 = col_character(),
  ..   P_15A19_F = col_character(),
  ..   P_15A19_M = col_character(),
  ..   P_20A24 = col_character(),
  ..   P_20A24_F = col_character(),
  ..   P_20A24_M = col_character(),
  ..   P_25A29 = col_character(),
  ..   P_25A29_F = col_character(),
  ..   P_25A29_M = col_character(),
  ..   P_30A34 = col_character(),
  ..   P_30A34_F = col_character(),
  ..   P_30A34_M = col_character(),
  ..   P_35A39 = col_character(),
  ..   P_35A39_F = col_character(),
  ..   P_35A39_M = col_character(),
  ..   P_40A44 = col_character(),
  ..   P_40A44_F = col_character(),
  ..   P_40A44_M = col_character(),
  ..   P_45A49 = col_character(),
  ..   P_45A49_F = col_character(),
  ..   P_45A49_M = col_character(),
  ..   P_50A54 = col_character(),
  ..   P_50A54_F = col_character(),
  ..   P_50A54_M = col_character(),
  ..   P_55A59 = col_character(),
  ..   P_55A59_F = col_character(),
  ..   P_55A59_M = col_character(),
  ..   P_60A64 = col_character(),
  ..   P_60A64_F = col_character(),
  ..   P_60A64_M = col_character(),
  ..   P_65A69 = col_character(),
  ..   P_65A69_F = col_character(),
  ..   P_65A69_M = col_character(),
  ..   P_70A74 = col_character(),
  ..   P_70A74_F = col_character(),
  ..   P_70A74_M = col_character(),
  ..   P_75A79 = col_character(),
  ..   P_75A79_F = col_character(),
  ..   P_75A79_M = col_character(),
  ..   P_80A84 = col_character(),
  ..   P_80A84_F = col_character(),
  ..   P_80A84_M = col_character(),
  ..   P_85YMAS = col_character(),
  ..   P_85YMAS_F = col_character(),
  ..   P_85YMAS_M = col_character(),
  ..   PROM_HNV = col_character(),
  ..   PNACENT = col_character(),
  ..   PNACENT_F = col_character(),
  ..   PNACENT_M = col_character(),
  ..   PNACOE = col_character(),
  ..   PNACOE_F = col_character(),
  ..   PNACOE_M = col_character(),
  ..   PRES2015 = col_character(),
  ..   PRES2015_F = col_character(),
  ..   PRES2015_M = col_character(),
  ..   PRESOE15 = col_character(),
  ..   PRESOE15_F = col_character(),
  ..   PRESOE15_M = col_character(),
  ..   P3YM_HLI = col_character(),
  ..   P3YM_HLI_F = col_character(),
  ..   P3YM_HLI_M = col_character(),
  ..   P3HLINHE = col_character(),
  ..   P3HLINHE_F = col_character(),
  ..   P3HLINHE_M = col_character(),
  ..   P3HLI_HE = col_character(),
  ..   P3HLI_HE_F = col_character(),
  ..   P3HLI_HE_M = col_character(),
  ..   P5_HLI = col_character(),
  ..   P5_HLI_NHE = col_character(),
  ..   P5_HLI_HE = col_character(),
  ..   PHOG_IND = col_character(),
  ..   POB_AFRO = col_character(),
  ..   POB_AFRO_F = col_character(),
  ..   POB_AFRO_M = col_character(),
  ..   PCON_DISC = col_character(),
  ..   PCDISC_MOT = col_character(),
  ..   PCDISC_VIS = col_character(),
  ..   PCDISC_LENG = col_character(),
  ..   PCDISC_AUD = col_character(),
  ..   PCDISC_MOT2 = col_character(),
  ..   PCDISC_MEN = col_character(),
  ..   PCON_LIMI = col_character(),
  ..   PCLIM_CSB = col_character(),
  ..   PCLIM_VIS = col_character(),
  ..   PCLIM_HACO = col_character(),
  ..   PCLIM_OAUD = col_character(),
  ..   PCLIM_MOT2 = col_character(),
  ..   PCLIM_RE_CO = col_character(),
  ..   PCLIM_PMEN = col_character(),
  ..   PSIND_LIM = col_character(),
  ..   P3A5_NOA = col_character(),
  ..   P3A5_NOA_F = col_character(),
  ..   P3A5_NOA_M = col_character(),
  ..   P6A11_NOA = col_character(),
  ..   P6A11_NOAF = col_character(),
  ..   P6A11_NOAM = col_character(),
  ..   P12A14NOA = col_character(),
  ..   P12A14NOAF = col_character(),
  ..   P12A14NOAM = col_character(),
  ..   P15A17A = col_character(),
  ..   P15A17A_F = col_character(),
  ..   P15A17A_M = col_character(),
  ..   P18A24A = col_character(),
  ..   P18A24A_F = col_character(),
  ..   P18A24A_M = col_character(),
  ..   P8A14AN = col_character(),
  ..   P8A14AN_F = col_character(),
  ..   P8A14AN_M = col_character(),
  ..   P15YM_AN = col_character(),
  ..   P15YM_AN_F = col_character(),
  ..   P15YM_AN_M = col_character(),
  ..   P15YM_SE = col_character(),
  ..   P15YM_SE_F = col_character(),
  ..   P15YM_SE_M = col_character(),
  ..   P15PRI_IN = col_character(),
  ..   P15PRI_INF = col_character(),
  ..   P15PRI_INM = col_character(),
  ..   P15PRI_CO = col_character(),
  ..   P15PRI_COF = col_character(),
  ..   P15PRI_COM = col_character(),
  ..   P15SEC_IN = col_character(),
  ..   P15SEC_INF = col_character(),
  ..   P15SEC_INM = col_character(),
  ..   P15SEC_CO = col_character(),
  ..   P15SEC_COF = col_character(),
  ..   P15SEC_COM = col_character(),
  ..   P18YM_PB = col_character(),
  ..   P18YM_PB_F = col_character(),
  ..   P18YM_PB_M = col_character(),
  ..   GRAPROES = col_character(),
  ..   GRAPROES_F = col_character(),
  ..   GRAPROES_M = col_character(),
  ..   PEA = col_character(),
  ..   PEA_F = col_character(),
  ..   PEA_M = col_character(),
  ..   PE_INAC = col_character(),
  ..   PE_INAC_F = col_character(),
  ..   PE_INAC_M = col_character(),
  ..   POCUPADA = col_character(),
  ..   POCUPADA_F = col_character(),
  ..   POCUPADA_M = col_character(),
  ..   PDESOCUP = col_character(),
  ..   PDESOCUP_F = col_character(),
  ..   PDESOCUP_M = col_character(),
  ..   PSINDER = col_character(),
  ..   PDER_SS = col_character(),
  ..   PDER_IMSS = col_character(),
  ..   PDER_ISTE = col_character(),
  ..   PDER_ISTEE = col_character(),
  ..   PAFIL_PDOM = col_character(),
  ..   PDER_SEGP = col_character(),
  ..   PDER_IMSSB = col_character(),
  ..   PAFIL_IPRIV = col_character(),
  ..   PAFIL_OTRAI = col_character(),
  ..   P12YM_SOLT = col_character(),
  ..   P12YM_CASA = col_character(),
  ..   P12YM_SEPA = col_character(),
  ..   PCATOLICA = col_character(),
  ..   PRO_CRIEVA = col_character(),
  ..   POTRAS_REL = col_character(),
  ..   PSIN_RELIG = col_character(),
  ..   TOTHOG = col_character(),
  ..   HOGJEF_F = col_character(),
  ..   HOGJEF_M = col_character(),
  ..   POBHOG = col_character(),
  ..   PHOGJEF_F = col_character(),
  ..   PHOGJEF_M = col_character(),
  ..   VIVTOT = col_double(),
  ..   TVIVHAB = col_double(),
  ..   TVIVPAR = col_character(),
  ..   VIVPAR_HAB = col_character(),
  ..   VIVPARH_CV = col_character(),
  ..   TVIVPARHAB = col_character(),
  ..   VIVPAR_DES = col_character(),
  ..   VIVPAR_UT = col_character(),
  ..   OCUPVIVPAR = col_character(),
  ..   PROM_OCUP = col_character(),
  ..   PRO_OCUP_C = col_character(),
  ..   VPH_PISODT = col_character(),
  ..   VPH_PISOTI = col_character(),
  ..   VPH_1DOR = col_character(),
  ..   VPH_2YMASD = col_character(),
  ..   VPH_1CUART = col_character(),
  ..   VPH_2CUART = col_character(),
  ..   VPH_3YMASC = col_character(),
  ..   VPH_C_ELEC = col_character(),
  ..   VPH_S_ELEC = col_character(),
  ..   VPH_AGUADV = col_character(),
  ..   VPH_AEASP = col_character(),
  ..   VPH_AGUAFV = col_character(),
  ..   VPH_TINACO = col_character(),
  ..   VPH_CISTER = col_character(),
  ..   VPH_EXCSA = col_character(),
  ..   VPH_LETR = col_character(),
  ..   VPH_DRENAJ = col_character(),
  ..   VPH_NODREN = col_character(),
  ..   VPH_C_SERV = col_character(),
  ..   VPH_NDEAED = col_character(),
  ..   VPH_DSADMA = col_character(),
  ..   VPH_NDACMM = col_character(),
  ..   VPH_SNBIEN = col_character(),
  ..   VPH_REFRI = col_character(),
  ..   VPH_LAVAD = col_character(),
  ..   VPH_HMICRO = col_character(),
  ..   VPH_AUTOM = col_character(),
  ..   VPH_MOTO = col_character(),
  ..   VPH_BICI = col_character(),
  ..   VPH_RADIO = col_character(),
  ..   VPH_TV = col_character(),
  ..   VPH_PC = col_character(),
  ..   VPH_TELEF = col_character(),
  ..   VPH_CEL = col_character(),
  ..   VPH_INTER = col_character(),
  ..   VPH_STVP = col_character(),
  ..   VPH_SPMVPI = col_character(),
  ..   VPH_CVJ = col_character(),
  ..   VPH_SINRTV = col_character(),
  ..   VPH_SINLTC = col_character(),
  ..   VPH_SINCINT = col_character(),
  ..   VPH_SINTIC = col_character(),
  ..   TAMLOC = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
clean_info_dict <- info_dict[-c(1:3), ]
names(clean_info_dict) <- clean_info_dict[1, ]
Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.Warning: The `value` argument of `names<-` must be a character vector as of tibble 3.0.0.
clean_info_dict <- clean_info_dict[-1,]
clean_info_dict <- clean_info_dict[, -c(7:10)]


clean_info_dict

Replacing names for consistency

replacement_dict <- c(
  "Coahuila de Zaragoza" = "Coahuila",
  "Michoacán de Ocampo" = "Michoacán",
  "Veracruz de Ignacio de la Llave" = "Veracruz",
  "México" = "Estado de México"

)

df_new_names <- df |> 
        mutate(NOM_ENT = ifelse(NOM_ENT %in% names(replacement_dict),
                          replacement_dict[NOM_ENT],
                          NOM_ENT))
unique_states <- df_new_names |> 
      distinct(NOM_ENT)

write_csv(unique_states, here("data", "processed", "entity_names.csv"))


unique_states
entities_csv <- read_csv(here("data", "processed", "entity_names.csv")) |> pull()
Rows: 33 Columns: 1── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): NOM_ENT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(entities_csv)
 [1] "Total nacional"      "Aguascalientes"      "Baja California"     "Baja California Sur" "Campeche"            "Coahuila"           
 [7] "Colima"              "Chiapas"             "Chihuahua"           "Ciudad de México"    "Durango"             "Guanajuato"         
[13] "Guerrero"            "Hidalgo"             "Jalisco"             "Estado de México"    "Michoacán"           "Morelos"            
[19] "Nayarit"             "Nuevo León"          "Oaxaca"              "Puebla"              "Querétaro"           "Quintana Roo"       
[25] "San Luis Potosí"     "Sinaloa"             "Sonora"              "Tabasco"             "Tamaulipas"          "Tlaxcala"           
[31] "Veracruz"            "Yucatán"             "Zacatecas"          

Selecting rows that we’ll analyze

rows_to_include <- c(2, 4, 6:8, 10, 53:117)

filtered_data <- clean_info_dict |> 
      filter(row_number() %in% rows_to_include) |> 
      pull(4)

filtered_data
 [1] "NOM_ENT"    "NOM_MUN"    "NOM_LOC"    "LONGITUD"   "LATITUD"    "POBTOT"     "REL_H_M"    "POB0_14"    "POB15_64"   "POB65_MAS"  "P_0A4"     
[12] "P_0A4_F"    "P_0A4_M"    "P_5A9"      "P_5A9_F"    "P_5A9_M"    "P_10A14"    "P_10A14_F"  "P_10A14_M"  "P_15A19"    "P_15A19_F"  "P_15A19_M" 
[23] "P_20A24"    "P_20A24_F"  "P_20A24_M"  "P_25A29"    "P_25A29_F"  "P_25A29_M"  "P_30A34"    "P_30A34_F"  "P_30A34_M"  "P_35A39"    "P_35A39_F" 
[34] "P_35A39_M"  "P_40A44"    "P_40A44_F"  "P_40A44_M"  "P_45A49"    "P_45A49_F"  "P_45A49_M"  "P_50A54"    "P_50A54_F"  "P_50A54_M"  "P_55A59"   
[45] "P_55A59_F"  "P_55A59_M"  "P_60A64"    "P_60A64_F"  "P_60A64_M"  "P_65A69"    "P_65A69_F"  "P_65A69_M"  "P_70A74"    "P_70A74_F"  "P_70A74_M" 
[56] "P_75A79"    "P_75A79_F"  "P_75A79_M"  "P_80A84"    "P_80A84_F"  "P_80A84_M"  "P_85YMAS"   "P_85YMAS_F" "P_85YMAS_M" "PROM_HNV"   "PNACENT"   
[67] "PNACENT_F"  "PNACENT_M"  "PNACOE"     "PNACOE_F"   "PNACOE_M"  
selected_df <- df_new_names |> 
      select(filtered_data)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(filtered_data)

# Now:
data %>% select(all_of(filtered_data))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
selected_df

EDA before exporting

str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ENTIDAD    : chr [1:195662] "00" "00" "00" "01" ...
 $ NOM_ENT    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
 $ MUN        : chr [1:195662] "000" "000" "000" "000" ...
 $ NOM_MUN    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
 $ LOC        : chr [1:195662] "0000" "9998" "9999" "0000" ...
 $ NOM_LOC    : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
 $ LONGITUD   : chr [1:195662] NA NA NA NA ...
 $ LATITUD    : chr [1:195662] NA NA NA NA ...
 $ ALTITUD    : chr [1:195662] NA NA NA NA ...
 $ POBTOT     : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
 $ POBFEM     : chr [1:195662] "64540634" "96869" "61324" "728924" ...
 $ POBMAS     : chr [1:195662] "61473390" "153485" "85801" "696683" ...
 $ P_0A2      : chr [1:195662] "5764054" "10493" "6798" "71864" ...
 $ P_0A2_F    : chr [1:195662] "2848875" "5193" "3407" "35604" ...
 $ P_0A2_M    : chr [1:195662] "2915179" "5300" "3391" "36260" ...
 $ P_3YMAS    : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
 $ P_3YMAS_F  : chr [1:195662] "61554567" "91463" "57628" "692561" ...
 $ P_3YMAS_M  : chr [1:195662] "58422017" "147978" "82129" "659674" ...
 $ P_5YMAS    : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
 $ P_5YMAS_F  : chr [1:195662] "59433559" "87931" "55256" "666713" ...
 $ P_5YMAS_M  : chr [1:195662] "56259714" "144155" "79772" "632956" ...
 $ P_12YMAS   : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
 $ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
 $ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
 $ P_15YMAS   : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
 $ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
 $ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
 $ P_18YMAS   : chr [1:195662] "87492680" "186968" "104612" "960764" ...
 $ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
 $ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
 $ P_3A5      : chr [1:195662] "6462212" "10900" "7028" "78833" ...
 $ P_3A5_F    : chr [1:195662] "3193548" "5270" "3511" "38679" ...
 $ P_3A5_M    : chr [1:195662] "3268664" "5630" "3517" "40154" ...
 $ P_6A11     : chr [1:195662] "12986217" "20793" "13506" "156683" ...
 $ P_6A11_F   : chr [1:195662] "6398755" "10082" "6574" "77289" ...
 $ P_6A11_M   : chr [1:195662] "6587462" "10711" "6932" "79394" ...
 $ P_8A14     : chr [1:195662] "15287375" "24342" "16724" "181905" ...
 $ P_8A14_F   : chr [1:195662] "7531118" "11538" "7679" "89383" ...
 $ P_8A14_M   : chr [1:195662] "7756257" "12804" "9045" "92522" ...
 $ P_12A14    : chr [1:195662] "6542801" "10337" "7693" "77815" ...
 $ P_12A14_F  : chr [1:195662] "3229273" "4767" "3268" "38206" ...
 $ P_12A14_M  : chr [1:195662] "3313528" "5570" "4425" "39609" ...
 $ P_15A17    : chr [1:195662] "6492674" "10443" "6918" "78140" ...
 $ P_15A17_F  : chr [1:195662] "3202134" "4830" "3091" "38298" ...
 $ P_15A17_M  : chr [1:195662] "3290540" "5613" "3827" "39842" ...
 $ P_18A24    : chr [1:195662] "14736111" "27841" "16336" "180847" ...
 $ P_18A24_F  : chr [1:195662] "7398617" "11140" "6760" "90632" ...
 $ P_18A24_M  : chr [1:195662] "7337494" "16701" "9576" "90215" ...
 $ P_15A49_F  : chr [1:195662] "33885546" "47693" "29297" "388917" ...
 $ P_60YMAS   : chr [1:195662] "15142976" "37383" "21277" "145376" ...
 $ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
 $ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
 $ REL_H_M    : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
 $ POB0_14    : chr [1:195662] "31755284" "52523" "35025" "385195" ...
 $ POB15_64   : chr [1:195662] "83663440" "171209" "96250" "941834" ...
 $ POB65_MAS  : chr [1:195662] "10321914" "26202" "15280" "97070" ...
 $ P_0A4      : chr [1:195662] "10047365" "17848" "11527" "124430" ...
 $ P_0A4_F    : chr [1:195662] "4969883" "8725" "5779" "61452" ...
 $ P_0A4_M    : chr [1:195662] "5077482" "9123" "5748" "62978" ...
 $ P_5A9      : chr [1:195662] "10764379" "17380" "11274" "131048" ...
 $ P_5A9_F    : chr [1:195662] "5311288" "8526" "5558" "64689" ...
 $ P_5A9_M    : chr [1:195662] "5453091" "8854" "5716" "66359" ...
 $ P_10A14    : chr [1:195662] "10943540" "17295" "12224" "129717" ...
 $ P_10A14_F  : chr [1:195662] "5389280" "8061" "5423" "63637" ...
 $ P_10A14_M  : chr [1:195662] "5554260" "9234" "6801" "66080" ...
 $ P_15A19    : chr [1:195662] "10806690" "18303" "11484" "131967" ...
 $ P_15A19_F  : chr [1:195662] "5344540" "8138" "5140" "65064" ...
 $ P_15A19_M  : chr [1:195662] "5462150" "10165" "6344" "66903" ...
 $ P_20A24    : chr [1:195662] "10422095" "19981" "11770" "127020" ...
 $ P_20A24_F  : chr [1:195662] "5256211" "7832" "4711" "63866" ...
 $ P_20A24_M  : chr [1:195662] "5165884" "12149" "7059" "63154" ...
 $ P_25A29    : chr [1:195662] "9993001" "20584" "12238" "118426" ...
 $ P_25A29_F  : chr [1:195662] "5131597" "7125" "4427" "60285" ...
 $ P_25A29_M  : chr [1:195662] "4861404" "13459" "7811" "58141" ...
 $ P_30A34    : chr [1:195662] "9420827" "19601" "11315" "106825" ...
 $ P_30A34_F  : chr [1:195662] "4893101" "6309" "4074" "55174" ...
 $ P_30A34_M  : chr [1:195662] "4527726" "13292" "7241" "51651" ...
 $ P_35A39    : chr [1:195662] "9020276" "18645" "10357" "99257" ...
 $ P_35A39_F  : chr [1:195662] "4688746" "6289" "3825" "51483" ...
 $ P_35A39_M  : chr [1:195662] "4331530" "12356" "6532" "47774" ...
 $ P_40A44    : chr [1:195662] "8503586" "17934" "9705" "92378" ...
 $ P_40A44_F  : chr [1:195662] "4441282" "6060" "3743" "48539" ...
 $ P_40A44_M  : chr [1:195662] "4062304" "11874" "5962" "43839" ...
 $ P_45A49    : chr [1:195662] "7942413" "16840" "8668" "84669" ...
 $ P_45A49_F  : chr [1:195662] "4130069" "5940" "3377" "44506" ...
 $ P_45A49_M  : chr [1:195662] "3812344" "10900" "5291" "40163" ...
 $ P_50A54    : chr [1:195662] "7037532" "15070" "7878" "74121" ...
 $ P_50A54_F  : chr [1:195662] "3705369" "5481" "3239" "39510" ...
 $ P_50A54_M  : chr [1:195662] "3332163" "9589" "4639" "34611" ...
 $ P_55A59    : chr [1:195662] "5695958" "13070" "6838" "58865" ...
 $ P_55A59_F  : chr [1:195662] "3002982" "4728" "2823" "31257" ...
 $ P_55A59_M  : chr [1:195662] "2692976" "8342" "4015" "27608" ...
 $ P_60A64    : chr [1:195662] "4821062" "11181" "5997" "48306" ...
 $ P_60A64_F  : chr [1:195662] "2563200" "4050" "2511" "25871" ...
 $ P_60A64_M  : chr [1:195662] "2257862" "7131" "3486" "22435" ...
 $ P_65A69    : chr [1:195662] "3645077" "9160" "5052" "35823" ...
 $ P_65A69_F  : chr [1:195662] "1938227" "3343" "2130" "19125" ...
 $ P_65A69_M  : chr [1:195662] "1706850" "5817" "2922" "16698" ...
 $ P_70A74    : chr [1:195662] "2647340" "6903" "3852" "25586" ...
  [list output truncated]
 - attr(*, "spec")=
  .. cols(
  ..   ENTIDAD = col_character(),
  ..   NOM_ENT = col_character(),
  ..   MUN = col_character(),
  ..   NOM_MUN = col_character(),
  ..   LOC = col_character(),
  ..   NOM_LOC = col_character(),
  ..   LONGITUD = col_character(),
  ..   LATITUD = col_character(),
  ..   ALTITUD = col_character(),
  ..   POBTOT = col_double(),
  ..   POBFEM = col_character(),
  ..   POBMAS = col_character(),
  ..   P_0A2 = col_character(),
  ..   P_0A2_F = col_character(),
  ..   P_0A2_M = col_character(),
  ..   P_3YMAS = col_character(),
  ..   P_3YMAS_F = col_character(),
  ..   P_3YMAS_M = col_character(),
  ..   P_5YMAS = col_character(),
  ..   P_5YMAS_F = col_character(),
  ..   P_5YMAS_M = col_character(),
  ..   P_12YMAS = col_character(),
  ..   P_12YMAS_F = col_character(),
  ..   P_12YMAS_M = col_character(),
  ..   P_15YMAS = col_character(),
  ..   P_15YMAS_F = col_character(),
  ..   P_15YMAS_M = col_character(),
  ..   P_18YMAS = col_character(),
  ..   P_18YMAS_F = col_character(),
  ..   P_18YMAS_M = col_character(),
  ..   P_3A5 = col_character(),
  ..   P_3A5_F = col_character(),
  ..   P_3A5_M = col_character(),
  ..   P_6A11 = col_character(),
  ..   P_6A11_F = col_character(),
  ..   P_6A11_M = col_character(),
  ..   P_8A14 = col_character(),
  ..   P_8A14_F = col_character(),
  ..   P_8A14_M = col_character(),
  ..   P_12A14 = col_character(),
  ..   P_12A14_F = col_character(),
  ..   P_12A14_M = col_character(),
  ..   P_15A17 = col_character(),
  ..   P_15A17_F = col_character(),
  ..   P_15A17_M = col_character(),
  ..   P_18A24 = col_character(),
  ..   P_18A24_F = col_character(),
  ..   P_18A24_M = col_character(),
  ..   P_15A49_F = col_character(),
  ..   P_60YMAS = col_character(),
  ..   P_60YMAS_F = col_character(),
  ..   P_60YMAS_M = col_character(),
  ..   REL_H_M = col_character(),
  ..   POB0_14 = col_character(),
  ..   POB15_64 = col_character(),
  ..   POB65_MAS = col_character(),
  ..   P_0A4 = col_character(),
  ..   P_0A4_F = col_character(),
  ..   P_0A4_M = col_character(),
  ..   P_5A9 = col_character(),
  ..   P_5A9_F = col_character(),
  ..   P_5A9_M = col_character(),
  ..   P_10A14 = col_character(),
  ..   P_10A14_F = col_character(),
  ..   P_10A14_M = col_character(),
  ..   P_15A19 = col_character(),
  ..   P_15A19_F = col_character(),
  ..   P_15A19_M = col_character(),
  ..   P_20A24 = col_character(),
  ..   P_20A24_F = col_character(),
  ..   P_20A24_M = col_character(),
  ..   P_25A29 = col_character(),
  ..   P_25A29_F = col_character(),
  ..   P_25A29_M = col_character(),
  ..   P_30A34 = col_character(),
  ..   P_30A34_F = col_character(),
  ..   P_30A34_M = col_character(),
  ..   P_35A39 = col_character(),
  ..   P_35A39_F = col_character(),
  ..   P_35A39_M = col_character(),
  ..   P_40A44 = col_character(),
  ..   P_40A44_F = col_character(),
  ..   P_40A44_M = col_character(),
  ..   P_45A49 = col_character(),
  ..   P_45A49_F = col_character(),
  ..   P_45A49_M = col_character(),
  ..   P_50A54 = col_character(),
  ..   P_50A54_F = col_character(),
  ..   P_50A54_M = col_character(),
  ..   P_55A59 = col_character(),
  ..   P_55A59_F = col_character(),
  ..   P_55A59_M = col_character(),
  ..   P_60A64 = col_character(),
  ..   P_60A64_F = col_character(),
  ..   P_60A64_M = col_character(),
  ..   P_65A69 = col_character(),
  ..   P_65A69_F = col_character(),
  ..   P_65A69_M = col_character(),
  ..   P_70A74 = col_character(),
  ..   P_70A74_F = col_character(),
  ..   P_70A74_M = col_character(),
  ..   P_75A79 = col_character(),
  ..   P_75A79_F = col_character(),
  ..   P_75A79_M = col_character(),
  ..   P_80A84 = col_character(),
  ..   P_80A84_F = col_character(),
  ..   P_80A84_M = col_character(),
  ..   P_85YMAS = col_character(),
  ..   P_85YMAS_F = col_character(),
  ..   P_85YMAS_M = col_character(),
  ..   PROM_HNV = col_character(),
  ..   PNACENT = col_character(),
  ..   PNACENT_F = col_character(),
  ..   PNACENT_M = col_character(),
  ..   PNACOE = col_character(),
  ..   PNACOE_F = col_character(),
  ..   PNACOE_M = col_character(),
  ..   PRES2015 = col_character(),
  ..   PRES2015_F = col_character(),
  ..   PRES2015_M = col_character(),
  ..   PRESOE15 = col_character(),
  ..   PRESOE15_F = col_character(),
  ..   PRESOE15_M = col_character(),
  ..   P3YM_HLI = col_character(),
  ..   P3YM_HLI_F = col_character(),
  ..   P3YM_HLI_M = col_character(),
  ..   P3HLINHE = col_character(),
  ..   P3HLINHE_F = col_character(),
  ..   P3HLINHE_M = col_character(),
  ..   P3HLI_HE = col_character(),
  ..   P3HLI_HE_F = col_character(),
  ..   P3HLI_HE_M = col_character(),
  ..   P5_HLI = col_character(),
  ..   P5_HLI_NHE = col_character(),
  ..   P5_HLI_HE = col_character(),
  ..   PHOG_IND = col_character(),
  ..   POB_AFRO = col_character(),
  ..   POB_AFRO_F = col_character(),
  ..   POB_AFRO_M = col_character(),
  ..   PCON_DISC = col_character(),
  ..   PCDISC_MOT = col_character(),
  ..   PCDISC_VIS = col_character(),
  ..   PCDISC_LENG = col_character(),
  ..   PCDISC_AUD = col_character(),
  ..   PCDISC_MOT2 = col_character(),
  ..   PCDISC_MEN = col_character(),
  ..   PCON_LIMI = col_character(),
  ..   PCLIM_CSB = col_character(),
  ..   PCLIM_VIS = col_character(),
  ..   PCLIM_HACO = col_character(),
  ..   PCLIM_OAUD = col_character(),
  ..   PCLIM_MOT2 = col_character(),
  ..   PCLIM_RE_CO = col_character(),
  ..   PCLIM_PMEN = col_character(),
  ..   PSIND_LIM = col_character(),
  ..   P3A5_NOA = col_character(),
  ..   P3A5_NOA_F = col_character(),
  ..   P3A5_NOA_M = col_character(),
  ..   P6A11_NOA = col_character(),
  ..   P6A11_NOAF = col_character(),
  ..   P6A11_NOAM = col_character(),
  ..   P12A14NOA = col_character(),
  ..   P12A14NOAF = col_character(),
  ..   P12A14NOAM = col_character(),
  ..   P15A17A = col_character(),
  ..   P15A17A_F = col_character(),
  ..   P15A17A_M = col_character(),
  ..   P18A24A = col_character(),
  ..   P18A24A_F = col_character(),
  ..   P18A24A_M = col_character(),
  ..   P8A14AN = col_character(),
  ..   P8A14AN_F = col_character(),
  ..   P8A14AN_M = col_character(),
  ..   P15YM_AN = col_character(),
  ..   P15YM_AN_F = col_character(),
  ..   P15YM_AN_M = col_character(),
  ..   P15YM_SE = col_character(),
  ..   P15YM_SE_F = col_character(),
  ..   P15YM_SE_M = col_character(),
  ..   P15PRI_IN = col_character(),
  ..   P15PRI_INF = col_character(),
  ..   P15PRI_INM = col_character(),
  ..   P15PRI_CO = col_character(),
  ..   P15PRI_COF = col_character(),
  ..   P15PRI_COM = col_character(),
  ..   P15SEC_IN = col_character(),
  ..   P15SEC_INF = col_character(),
  ..   P15SEC_INM = col_character(),
  ..   P15SEC_CO = col_character(),
  ..   P15SEC_COF = col_character(),
  ..   P15SEC_COM = col_character(),
  ..   P18YM_PB = col_character(),
  ..   P18YM_PB_F = col_character(),
  ..   P18YM_PB_M = col_character(),
  ..   GRAPROES = col_character(),
  ..   GRAPROES_F = col_character(),
  ..   GRAPROES_M = col_character(),
  ..   PEA = col_character(),
  ..   PEA_F = col_character(),
  ..   PEA_M = col_character(),
  ..   PE_INAC = col_character(),
  ..   PE_INAC_F = col_character(),
  ..   PE_INAC_M = col_character(),
  ..   POCUPADA = col_character(),
  ..   POCUPADA_F = col_character(),
  ..   POCUPADA_M = col_character(),
  ..   PDESOCUP = col_character(),
  ..   PDESOCUP_F = col_character(),
  ..   PDESOCUP_M = col_character(),
  ..   PSINDER = col_character(),
  ..   PDER_SS = col_character(),
  ..   PDER_IMSS = col_character(),
  ..   PDER_ISTE = col_character(),
  ..   PDER_ISTEE = col_character(),
  ..   PAFIL_PDOM = col_character(),
  ..   PDER_SEGP = col_character(),
  ..   PDER_IMSSB = col_character(),
  ..   PAFIL_IPRIV = col_character(),
  ..   PAFIL_OTRAI = col_character(),
  ..   P12YM_SOLT = col_character(),
  ..   P12YM_CASA = col_character(),
  ..   P12YM_SEPA = col_character(),
  ..   PCATOLICA = col_character(),
  ..   PRO_CRIEVA = col_character(),
  ..   POTRAS_REL = col_character(),
  ..   PSIN_RELIG = col_character(),
  ..   TOTHOG = col_character(),
  ..   HOGJEF_F = col_character(),
  ..   HOGJEF_M = col_character(),
  ..   POBHOG = col_character(),
  ..   PHOGJEF_F = col_character(),
  ..   PHOGJEF_M = col_character(),
  ..   VIVTOT = col_double(),
  ..   TVIVHAB = col_double(),
  ..   TVIVPAR = col_character(),
  ..   VIVPAR_HAB = col_character(),
  ..   VIVPARH_CV = col_character(),
  ..   TVIVPARHAB = col_character(),
  ..   VIVPAR_DES = col_character(),
  ..   VIVPAR_UT = col_character(),
  ..   OCUPVIVPAR = col_character(),
  ..   PROM_OCUP = col_character(),
  ..   PRO_OCUP_C = col_character(),
  ..   VPH_PISODT = col_character(),
  ..   VPH_PISOTI = col_character(),
  ..   VPH_1DOR = col_character(),
  ..   VPH_2YMASD = col_character(),
  ..   VPH_1CUART = col_character(),
  ..   VPH_2CUART = col_character(),
  ..   VPH_3YMASC = col_character(),
  ..   VPH_C_ELEC = col_character(),
  ..   VPH_S_ELEC = col_character(),
  ..   VPH_AGUADV = col_character(),
  ..   VPH_AEASP = col_character(),
  ..   VPH_AGUAFV = col_character(),
  ..   VPH_TINACO = col_character(),
  ..   VPH_CISTER = col_character(),
  ..   VPH_EXCSA = col_character(),
  ..   VPH_LETR = col_character(),
  ..   VPH_DRENAJ = col_character(),
  ..   VPH_NODREN = col_character(),
  ..   VPH_C_SERV = col_character(),
  ..   VPH_NDEAED = col_character(),
  ..   VPH_DSADMA = col_character(),
  ..   VPH_NDACMM = col_character(),
  ..   VPH_SNBIEN = col_character(),
  ..   VPH_REFRI = col_character(),
  ..   VPH_LAVAD = col_character(),
  ..   VPH_HMICRO = col_character(),
  ..   VPH_AUTOM = col_character(),
  ..   VPH_MOTO = col_character(),
  ..   VPH_BICI = col_character(),
  ..   VPH_RADIO = col_character(),
  ..   VPH_TV = col_character(),
  ..   VPH_PC = col_character(),
  ..   VPH_TELEF = col_character(),
  ..   VPH_CEL = col_character(),
  ..   VPH_INTER = col_character(),
  ..   VPH_STVP = col_character(),
  ..   VPH_SPMVPI = col_character(),
  ..   VPH_CVJ = col_character(),
  ..   VPH_SINRTV = col_character(),
  ..   VPH_SINLTC = col_character(),
  ..   VPH_SINCINT = col_character(),
  ..   VPH_SINTIC = col_character(),
  ..   TAMLOC = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 

Exporting as parquet

# Export wrangled data as parquet file
table <- arrow::Table$create(selected_df)

output_dir <- here("data", "processed", "parquet_data")

arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT"), existing_data_behavior = "overwrite")

Reading parquet

ds <- open_dataset(here("data", "processed", "parquet_data")) |> 
        collect()

ds

Puebla

ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Puebla") |> 
    collect()

ds_puebla

Yucatán

ds_yucatan <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Yucatán") |> 
    collect()

ds_yucatan

Nuevo León

ds_nuevo_leon <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Nuevo León") |> 
    collect()

ds_nuevo_leon

Total Nacional

ds_nacional <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Total nacional") |> 
    collect()

ds_nacional

Verify datasets are not empty


for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()

  if (nrow(read_dfs) == 0) {
    print(paste("Dataset is empty", value))
  } else {
        print(paste("OK", value, nrow(read_dfs)))

  }

}
[1] "OK Total nacional 3"
[1] "OK Aguascalientes 2058"
[1] "OK Baja California 5566"
[1] "OK Baja California Sur 2561"
[1] "OK Campeche 2800"
[1] "OK Coahuila 4149"
[1] "OK Colima 1259"
[1] "OK Chiapas 21487"
[1] "OK Chihuahua 12389"
[1] "OK Ciudad de México 666"
[1] "OK Durango 6006"
[1] "OK Guanajuato 8945"
[1] "OK Guerrero 7001"
[1] "OK Hidalgo 4916"
[1] "OK Jalisco 10715"
[1] "OK Estado de México 5136"
[1] "OK Michoacán 8956"
[1] "OK Morelos 1678"
[1] "OK Nayarit 2913"
[1] "OK Nuevo León 4974"
[1] "OK Oaxaca 11856"
[1] "OK Puebla 7059"
[1] "OK Querétaro 2249"
[1] "OK Quintana Roo 2243"
[1] "OK San Luis Potosí 6729"
[1] "OK Sinaloa 5552"
[1] "OK Sonora 7500"
[1] "OK Tabasco 2517"
[1] "OK Tamaulipas 6695"
[1] "OK Tlaxcala 1323"
[1] "OK Veracruz 20401"
[1] "OK Yucatán 2691"
[1] "OK Zacatecas 4669"

Coordinate Lab

longitudes <- selected_df$LONGITUD
latitudes <- selected_df$LATITUD
test_long <- longitudes[8]
test_long
[1] "102°17'45.768\" W"
sections <- unlist(strsplit(test_long, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])

decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
decimal_degrees
[1] -102.296
longitude_to_decimal <- function(test_long) {
    if (is.na(test_long)) {
    return(NA)  
    }
  
  sections <- unlist(strsplit(test_long, "[°'\" ]"))
  
  degrees <- as.numeric(sections[1])
  minutes <- as.numeric(sections[2])
  seconds <- as.numeric(sections[3])

  
  decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
}

latitude_to_decimal <- function(test_lat) {
    if (is.na(test_lat)) {
    return(NA)  
    }
  
  sections <- unlist(strsplit(test_lat, "[°'\" ]"))
  
  degrees <- as.numeric(sections[1])
  minutes <- as.numeric(sections[2])
  seconds <- as.numeric(sections[3])

  
  decimal_degrees <- (degrees + minutes / 60 + seconds / 3600)
}
selected_clean <- selected_df |> 
      mutate(longitude_decimal = sapply(LONGITUD, longitude_to_decimal),
             latitude_decimal = sapply(LATITUD, latitude_to_decimal))

selected_clean

Exporting clean

table <- arrow::Table$create(selected_clean)

output_dir <- here("data", "processed", "parquet_data_coords")

arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT"), existing_data_behavior = "overwrite")
prueba_csv <- read_csv(here("data", "processed", "data_coords.csv"))
Rows: 195662 Columns: 73── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (70): NOM_ENT, NOM_MUN, NOM_LOC, LONGITUD, LATITUD, REL_H_M, POB0_14, POB15_64, POB65_MAS, P_0A4, P_0A4_F, P_0A4_M, P_5A9, P_5A9_F, P_5A9_M, P_10A...
dbl  (3): POBTOT, longitude_decimal, latitude_decimal
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
prueba_csv

Verify unique cities per State

for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC))))

  }

}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1772"
[1] "NOT OK Baja California 5566 <> 4621"
[1] "NOT OK Baja California Sur 2561 <> 1832"
[1] "NOT OK Campeche 2800 <> 1894"
[1] "NOT OK Coahuila 4149 <> 3287"
[1] "NOT OK Colima 1259 <> 1035"
[1] "NOT OK Chiapas 21487 <> 10349"
[1] "NOT OK Chihuahua 12389 <> 8082"
[1] "NOT OK Ciudad de México 666 <> 617"
[1] "NOT OK Durango 6006 <> 4444"
[1] "NOT OK Guanajuato 8945 <> 6923"
[1] "NOT OK Guerrero 7001 <> 5189"
[1] "NOT OK Hidalgo 4916 <> 3690"
[1] "NOT OK Jalisco 10715 <> 6764"
[1] "NOT OK Estado de México 5136 <> 4291"
[1] "NOT OK Michoacán 8956 <> 6065"
[1] "NOT OK Morelos 1678 <> 1471"
[1] "NOT OK Nayarit 2913 <> 2243"
[1] "NOT OK Nuevo León 4974 <> 3328"
[1] "NOT OK Oaxaca 11856 <> 7924"
[1] "NOT OK Puebla 7059 <> 5037"
[1] "NOT OK Querétaro 2249 <> 1885"
[1] "NOT OK Quintana Roo 2243 <> 1832"
[1] "NOT OK San Luis Potosí 6729 <> 5037"
[1] "NOT OK Sinaloa 5552 <> 4064"
[1] "NOT OK Sonora 7500 <> 5710"
[1] "NOT OK Tabasco 2517 <> 2019"
[1] "NOT OK Tamaulipas 6695 <> 4601"
[1] "NOT OK Tlaxcala 1323 <> 1075"
[1] "NOT OK Veracruz 20401 <> 12141"
[1] "NOT OK Yucatán 2691 <> 1790"
[1] "NOT OK Zacatecas 4669 <> 3594"
for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$NOM_MUN_LOC <- paste(read_dfs$NOM_MUN, read_dfs$NOM_LOC, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_MUN_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_MUN_LOC))))

  }

}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1979"
[1] "NOT OK Baja California 5566 <> 5074"
[1] "NOT OK Baja California Sur 2561 <> 2212"
[1] "NOT OK Campeche 2800 <> 2368"
[1] "NOT OK Coahuila 4149 <> 4023"
[1] "NOT OK Colima 1259 <> 1177"
[1] "NOT OK Chiapas 21487 <> 18268"
[1] "NOT OK Chihuahua 12389 <> 11167"
[1] "NOT OK Ciudad de México 666 <> 663"
[1] "NOT OK Durango 6006 <> 5578"
[1] "NOT OK Guanajuato 8945 <> 8753"
[1] "NOT OK Guerrero 7001 <> 6870"
[1] "NOT OK Hidalgo 4916 <> 4870"
[1] "NOT OK Jalisco 10715 <> 10393"
[1] "NOT OK Estado de México 5136 <> 5108"
[1] "NOT OK Michoacán 8956 <> 8656"
[1] "NOT OK Morelos 1678 <> 1662"
[1] "NOT OK Nayarit 2913 <> 2726"
[1] "NOT OK Nuevo León 4974 <> 4641"
[1] "NOT OK Oaxaca 11856 <> 11760"
[1] "NOT OK Puebla 7059 <> 6837"
[1] "NOT OK Querétaro 2249 <> 2222"
[1] "NOT OK Quintana Roo 2243 <> 2138"
[1] "NOT OK San Luis Potosí 6729 <> 6590"
[1] "NOT OK Sinaloa 5552 <> 5141"
[1] "NOT OK Sonora 7500 <> 7141"
[1] "NOT OK Tabasco 2517 <> 2399"
[1] "NOT OK Tamaulipas 6695 <> 6265"
[1] "NOT OK Tlaxcala 1323 <> 1315"
[1] "NOT OK Veracruz 20401 <> 19225"
[1] "NOT OK Yucatán 2691 <> 2558"
[1] "NOT OK Zacatecas 4669 <> 4610"
for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$NOM_LOC_LOC <- paste(read_dfs$LOC, read_dfs$NOM_LOC, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC_LOC))))

  }

}
Warning: Unknown or uninitialised column: `LOC`.
[1] "Localities Unique Total nacional"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Aguascalientes 2058 <> 1772"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California 5566 <> 4621"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California Sur 2561 <> 1832"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Campeche 2800 <> 1894"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Coahuila 4149 <> 3287"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Colima 1259 <> 1035"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chiapas 21487 <> 10349"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chihuahua 12389 <> 8082"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Ciudad de México 666 <> 617"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Durango 6006 <> 4444"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guanajuato 8945 <> 6923"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guerrero 7001 <> 5189"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Hidalgo 4916 <> 3690"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Jalisco 10715 <> 6764"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Estado de México 5136 <> 4291"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Michoacán 8956 <> 6065"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Morelos 1678 <> 1471"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nayarit 2913 <> 2243"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nuevo León 4974 <> 3328"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Oaxaca 11856 <> 7924"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Puebla 7059 <> 5037"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Querétaro 2249 <> 1885"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Quintana Roo 2243 <> 1832"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK San Luis Potosí 6729 <> 5037"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sinaloa 5552 <> 4064"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sonora 7500 <> 5710"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tabasco 2517 <> 2019"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tamaulipas 6695 <> 4601"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tlaxcala 1323 <> 1075"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Veracruz 20401 <> 12141"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Yucatán 2691 <> 1790"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Zacatecas 4669 <> 3594"

{# {r} # for(value in entities_csv) { # # read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |> # filter(NOM_ENT==value) |> # collect() # # read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$MUN, sep = "_") # # if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) { # print(paste("Localities Unique", value)) # } else { # print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN)))) # # } # # }

for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$NOM_MUN, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN))))

  }

}
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Total nacional 3 <> 1"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Aguascalientes 2058 <> 12"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California 5566 <> 7"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California Sur 2561 <> 6"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Campeche 2800 <> 13"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Coahuila 4149 <> 39"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Colima 1259 <> 11"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chiapas 21487 <> 125"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chihuahua 12389 <> 68"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Ciudad de México 666 <> 17"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Durango 6006 <> 40"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guanajuato 8945 <> 47"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guerrero 7001 <> 82"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Hidalgo 4916 <> 85"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Jalisco 10715 <> 126"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Estado de México 5136 <> 126"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Michoacán 8956 <> 114"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Morelos 1678 <> 37"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nayarit 2913 <> 21"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nuevo León 4974 <> 52"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Oaxaca 11856 <> 569"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Puebla 7059 <> 218"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Querétaro 2249 <> 19"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Quintana Roo 2243 <> 12"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK San Luis Potosí 6729 <> 59"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sinaloa 5552 <> 19"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sonora 7500 <> 73"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tabasco 2517 <> 18"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tamaulipas 6695 <> 44"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tlaxcala 1323 <> 61"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Veracruz 20401 <> 213"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Yucatán 2691 <> 107"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Zacatecas 4669 <> 59"

Conclusion

Append code to MUN and LOC

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogTWFyY28gUG9sbyBCcmF2byBNb250aWVsDQpkYXRlOiAyMDIwLTA0LTIxDQotLS0NCg0KIyMjIExpYnJhcmllcw0KDQpgYGB7cn0NCiMgaWYgKCFyZXF1aXJlKCJyZW52IikpIGluc3RhbGwucGFja2FnZXMoInJlbnYiKQ0KIyBsaWJyYXJ5KHJlbnYpDQojIHJlbnY6OnJlc3RvcmUoKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KGFycm93KQ0KYGBgDQoNCiMjIyBSZWFkIGRhdGENCg0KYGBge3J9DQp6aXBfZmlsZSA8LSBoZXJlKCJkYXRhIiwgInJhdyIsICJpdGVyXzAwX2NwdjIwMjBfY3N2LnppcCIpDQpgYGANCg0KYGBge3J9DQp0ZW1wX2RpciA8LSBoZXJlKCJ0ZW1wIikNCmRpci5jcmVhdGUodGVtcF9kaXIsIHNob3dXYXJuaW5ncyA9IEZBTFNFKQ0KDQp1bnppcCh6aXBfZmlsZSwgZmlsZXMgPSBjKCJpdGVyXzAwX2NwdjIwMjAvY29uanVudG9fZGVfZGF0b3MvY29uanVudG9fZGVfZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIsICJpdGVyXzAwX2NwdjIwMjAvZGljY2lvbmFyaW9fZGF0b3MvZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpLCBleGRpciA9IHRlbXBfZGlyKQ0KYGBgDQoNCmBgYHtyfQ0KDQpkYXRhX3BhdGggPC0gaGVyZSh0ZW1wX2RpciwNCiAgICAgICAgICAgICAgICAgIml0ZXJfMDBfY3B2MjAyMCIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvcyIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvc19pdGVyXzAwQ1NWMjAuY3N2IikNCg0KZGljdF9wYXRoIDwtIGhlcmUodGVtcF9kaXIsDQogICAgICAgICAgICAgICAgICJpdGVyXzAwX2NwdjIwMjAiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3MiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpDQoNCmluZm9fZGljdCA8LSByZWFkX2NzdihkaWN0X3BhdGgpDQpkZiA8LSByZWFkX2NzdihkYXRhX3BhdGgpDQoNCg0KdW5saW5rKHRlbXBfZGlyLCByZWN1cnNpdmUgPSBUUlVFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBFeHBvcnRpbmcgZGljdGlvbmFyeSBmaWxlDQp3cml0ZV9jc3YoaW5mb19kaWN0LA0KICAgICAgICAgIGhlcmUoImRhdGEiLCAicmF3IiwgImRpY2Npb25hcmlvX2RhdG9zX2l0ZXJfMDBDU1YyMC5jc3YiKSkNCg0KYGBgDQoNCiMjIyBFeHBsb3JhdGlvbg0KDQpgYGB7cn0NCmhlYWQoZGYpDQpoZWFkKGluZm9fZGljdCkNCmBgYA0KDQpgYGB7cn0NCnN0cihkZikNCmBgYA0KDQpgYGB7cn0NCmNsZWFuX2luZm9fZGljdCA8LSBpbmZvX2RpY3RbLWMoMTozKSwgXQ0KbmFtZXMoY2xlYW5faW5mb19kaWN0KSA8LSBjbGVhbl9pbmZvX2RpY3RbMSwgXQ0KY2xlYW5faW5mb19kaWN0IDwtIGNsZWFuX2luZm9fZGljdFstMSxdDQpjbGVhbl9pbmZvX2RpY3QgPC0gY2xlYW5faW5mb19kaWN0WywgLWMoNzoxMCldDQoNCg0KY2xlYW5faW5mb19kaWN0DQpgYGANCg0KIyMjIFJlcGxhY2luZyBuYW1lcyBmb3IgY29uc2lzdGVuY3kNCg0KYGBge3J9DQpyZXBsYWNlbWVudF9kaWN0IDwtIGMoDQogICJDb2FodWlsYSBkZSBaYXJhZ296YSIgPSAiQ29haHVpbGEiLA0KICAiTWljaG9hY8OhbiBkZSBPY2FtcG8iID0gIk1pY2hvYWPDoW4iLA0KICAiVmVyYWNydXogZGUgSWduYWNpbyBkZSBsYSBMbGF2ZSIgPSAiVmVyYWNydXoiLA0KICAiTcOpeGljbyIgPSAiRXN0YWRvIGRlIE3DqXhpY28iDQoNCikNCg0KZGZfbmV3X25hbWVzIDwtIGRmIHw+IA0KICAgICAgICBtdXRhdGUoTk9NX0VOVCA9IGlmZWxzZShOT01fRU5UICVpbiUgbmFtZXMocmVwbGFjZW1lbnRfZGljdCksDQogICAgICAgICAgICAgICAgICAgICAgICAgIHJlcGxhY2VtZW50X2RpY3RbTk9NX0VOVF0sDQogICAgICAgICAgICAgICAgICAgICAgICAgIE5PTV9FTlQpKQ0KYGBgDQoNCmBgYHtyfQ0KdW5pcXVlX3N0YXRlcyA8LSBkZl9uZXdfbmFtZXMgfD4gDQogICAgICBkaXN0aW5jdChOT01fRU5UKQ0KDQp3cml0ZV9jc3YodW5pcXVlX3N0YXRlcywgaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAiZW50aXR5X25hbWVzLmNzdiIpKQ0KDQoNCnVuaXF1ZV9zdGF0ZXMNCmBgYA0KDQpgYGB7cn0NCmVudGl0aWVzX2NzdiA8LSByZWFkX2NzdihoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJlbnRpdHlfbmFtZXMuY3N2IikpIHw+IHB1bGwoKQ0KDQpwcmludChlbnRpdGllc19jc3YpDQpgYGANCg0KIyMjIFNlbGVjdGluZyByb3dzIHRoYXQgd2UnbGwgYW5hbHl6ZQ0KDQpgYGB7cn0NCnJvd3NfdG9faW5jbHVkZSA8LSBjKDIsIDQsIDY6OCwgMTAsIDUzOjExNykNCg0KZmlsdGVyZWRfZGF0YSA8LSBjbGVhbl9pbmZvX2RpY3QgfD4gDQogICAgICBmaWx0ZXIocm93X251bWJlcigpICVpbiUgcm93c190b19pbmNsdWRlKSB8PiANCiAgICAgIHB1bGwoNCkNCg0KZmlsdGVyZWRfZGF0YQ0KYGBgDQoNCmBgYHtyfQ0Kc2VsZWN0ZWRfZGYgPC0gZGZfbmV3X25hbWVzIHw+IA0KICAgICAgc2VsZWN0KGZpbHRlcmVkX2RhdGEpDQoNCnNlbGVjdGVkX2RmDQpgYGANCg0KIyMjIEVEQSBiZWZvcmUgZXhwb3J0aW5nDQoNCmBgYHtyfQ0Kc3RyKGRmKQ0KYGBgDQoNCiMjIyBFeHBvcnRpbmcgYXMgcGFycXVldA0KDQpgYGB7cn0NCiMgRXhwb3J0IHdyYW5nbGVkIGRhdGEgYXMgcGFycXVldCBmaWxlDQp0YWJsZSA8LSBhcnJvdzo6VGFibGUkY3JlYXRlKHNlbGVjdGVkX2RmKQ0KDQpvdXRwdXRfZGlyIDwtIGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpDQoNCmFycm93Ojp3cml0ZV9kYXRhc2V0KHRhYmxlLCBvdXRwdXRfZGlyLCBwYXJ0aXRpb25pbmcgPSBjKCJOT01fRU5UIiksIGV4aXN0aW5nX2RhdGFfYmVoYXZpb3IgPSAib3ZlcndyaXRlIikNCmBgYA0KDQojIyMgUmVhZGluZyBwYXJxdWV0DQoNCmBgYHtyfQ0KZHMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8PiANCiAgICAgICAgY29sbGVjdCgpDQoNCmRzDQpgYGANCg0KIyMjIFB1ZWJsYQ0KDQpgYGB7cn0NCmRzX3B1ZWJsYSA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PSJQdWVibGEiKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfcHVlYmxhDQpgYGANCg0KIyMjIFl1Y2F0w6FuDQoNCmBgYHtyfQ0KZHNfeXVjYXRhbiA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PSJZdWNhdMOhbiIpIHw+IA0KICAgIGNvbGxlY3QoKQ0KDQpkc195dWNhdGFuDQpgYGANCg0KIyMjIE51ZXZvIExlw7NuDQoNCmBgYHtyfQ0KZHNfbnVldm9fbGVvbiA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PSJOdWV2byBMZcOzbiIpIHw+IA0KICAgIGNvbGxlY3QoKQ0KDQpkc19udWV2b19sZW9uDQpgYGANCg0KIyMjIFRvdGFsIE5hY2lvbmFsDQoNCmBgYHtyfQ0KZHNfbmFjaW9uYWwgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iVG90YWwgbmFjaW9uYWwiKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfbmFjaW9uYWwNCmBgYA0KDQojIyMgVmVyaWZ5IGRhdGFzZXRzIGFyZSBub3QgZW1wdHkNCg0KYGBge3J9DQoNCmZvcih2YWx1ZSBpbiBlbnRpdGllc19jc3YpIHsNCiAgDQogIHJlYWRfZGZzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4NCiAgICBmaWx0ZXIoTk9NX0VOVD09dmFsdWUpIHw+DQogICAgY29sbGVjdCgpDQoNCiAgaWYgKG5yb3cocmVhZF9kZnMpID09IDApIHsNCiAgICBwcmludChwYXN0ZSgiRGF0YXNldCBpcyBlbXB0eSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk9LIiwgdmFsdWUsIG5yb3cocmVhZF9kZnMpKSkNCg0KICB9DQoNCn0NCmBgYA0KDQojIyMgQ29vcmRpbmF0ZSBMYWINCg0KYGBge3J9DQpsb25naXR1ZGVzIDwtIHNlbGVjdGVkX2RmJExPTkdJVFVEDQpsYXRpdHVkZXMgPC0gc2VsZWN0ZWRfZGYkTEFUSVRVRA0KYGBgDQoNCmBgYHtyfQ0KdGVzdF9sb25nIDwtIGxvbmdpdHVkZXNbOF0NCnRlc3RfbG9uZw0KYGBgDQoNCmBgYHtyfQ0Kc2VjdGlvbnMgPC0gdW5saXN0KHN0cnNwbGl0KHRlc3RfbG9uZywgIlvCsCdcIiBdIikpDQpkZWdyZWVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMV0pDQptaW51dGVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMl0pDQpzZWNvbmRzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbM10pDQoNCmRlY2ltYWxfZGVncmVlcyA8LSAoZGVncmVlcyArIG1pbnV0ZXMgLyA2MCArIHNlY29uZHMgLyAzNjAwKSAqIC0xDQpkZWNpbWFsX2RlZ3JlZXMNCmBgYA0KDQpgYGB7cn0NCmxvbmdpdHVkZV90b19kZWNpbWFsIDwtIGZ1bmN0aW9uKHRlc3RfbG9uZykgew0KICAgIGlmIChpcy5uYSh0ZXN0X2xvbmcpKSB7DQogICAgcmV0dXJuKE5BKSAgDQogICAgfQ0KICANCiAgc2VjdGlvbnMgPC0gdW5saXN0KHN0cnNwbGl0KHRlc3RfbG9uZywgIlvCsCdcIiBdIikpDQogIA0KICBkZWdyZWVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMV0pDQogIG1pbnV0ZXMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1syXSkNCiAgc2Vjb25kcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzNdKQ0KDQogIA0KICBkZWNpbWFsX2RlZ3JlZXMgPC0gKGRlZ3JlZXMgKyBtaW51dGVzIC8gNjAgKyBzZWNvbmRzIC8gMzYwMCkgKiAtMQ0KfQ0KDQpsYXRpdHVkZV90b19kZWNpbWFsIDwtIGZ1bmN0aW9uKHRlc3RfbGF0KSB7DQogICAgaWYgKGlzLm5hKHRlc3RfbGF0KSkgew0KICAgIHJldHVybihOQSkgIA0KICAgIH0NCiAgDQogIHNlY3Rpb25zIDwtIHVubGlzdChzdHJzcGxpdCh0ZXN0X2xhdCwgIlvCsCdcIiBdIikpDQogIA0KICBkZWdyZWVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMV0pDQogIG1pbnV0ZXMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1syXSkNCiAgc2Vjb25kcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzNdKQ0KDQogIA0KICBkZWNpbWFsX2RlZ3JlZXMgPC0gKGRlZ3JlZXMgKyBtaW51dGVzIC8gNjAgKyBzZWNvbmRzIC8gMzYwMCkNCn0NCmBgYA0KDQpgYGB7cn0NCnNlbGVjdGVkX2NsZWFuIDwtIHNlbGVjdGVkX2RmIHw+IA0KICAgICAgbXV0YXRlKGxvbmdpdHVkZV9kZWNpbWFsID0gc2FwcGx5KExPTkdJVFVELCBsb25naXR1ZGVfdG9fZGVjaW1hbCksDQogICAgICAgICAgICAgbGF0aXR1ZGVfZGVjaW1hbCA9IHNhcHBseShMQVRJVFVELCBsYXRpdHVkZV90b19kZWNpbWFsKSkNCg0Kc2VsZWN0ZWRfY2xlYW4NCmBgYA0KDQojIyMgRXhwb3J0aW5nIGNsZWFuDQoNCmBgYHtyfQ0KdGFibGUgPC0gYXJyb3c6OlRhYmxlJGNyZWF0ZShzZWxlY3RlZF9jbGVhbikNCg0Kb3V0cHV0X2RpciA8LSBoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGFfY29vcmRzIikNCg0KYXJyb3c6OndyaXRlX2RhdGFzZXQodGFibGUsIG91dHB1dF9kaXIsIHBhcnRpdGlvbmluZyA9IGMoIk5PTV9FTlQiKSwgZXhpc3RpbmdfZGF0YV9iZWhhdmlvciA9ICJvdmVyd3JpdGUiKQ0KDQp3cml0ZV9jc3Yoc2VsZWN0ZWRfY2xlYW4sDQogICAgICAgICAgaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAiZGF0YV9jb29yZHMuY3N2IikpDQpgYGANCg0KYGBge3J9DQpwcnVlYmFfY3N2IDwtIHJlYWRfY3N2KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgImRhdGFfY29vcmRzLmNzdiIpKQ0KcHJ1ZWJhX2Nzdg0KYGBgDQoNCiMjIyBWZXJpZnkgdW5pcXVlIGNpdGllcyBwZXIgU3RhdGUNCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0MpKSkgew0KICAgIHByaW50KHBhc3RlKCJMb2NhbGl0aWVzIFVuaXF1ZSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk5PVCBPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSwgIjw+IiwgbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmBgYHtyfQ0KZm9yKHZhbHVlIGluIGVudGl0aWVzX2Nzdikgew0KICANCiAgcmVhZF9kZnMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT12YWx1ZSkgfD4NCiAgICBjb2xsZWN0KCkNCiAgDQogICAgcmVhZF9kZnMkTk9NX01VTl9MT0MgPC0gcGFzdGUocmVhZF9kZnMkTk9NX01VTiwgcmVhZF9kZnMkTk9NX0xPQywgc2VwID0gIl8iKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9NVU5fTE9DKSkpIHsNCiAgICBwcmludChwYXN0ZSgiTG9jYWxpdGllcyBVbmlxdWUiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJOT1QgT0siLCB2YWx1ZSwgbnJvdyhyZWFkX2RmcyksICI8PiIsIGxlbmd0aCh1bmlxdWUocmVhZF9kZnMkTk9NX01VTl9MT0MpKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KICANCiAgICByZWFkX2RmcyROT01fTE9DX0xPQyA8LSBwYXN0ZShyZWFkX2RmcyRMT0MsIHJlYWRfZGZzJE5PTV9MT0MsIHNlcCA9ICJfIikNCg0KICBpZiAobnJvdyhyZWFkX2RmcykgPT0gbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DX0xPQykpKSB7DQogICAgcHJpbnQocGFzdGUoIkxvY2FsaXRpZXMgVW5pcXVlIiwgdmFsdWUpKQ0KICB9IGVsc2Ugew0KICAgICAgICBwcmludChwYXN0ZSgiTk9UIE9LIiwgdmFsdWUsIG5yb3cocmVhZF9kZnMpLCAiPD4iLCBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0NfTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmB7IyB7cn0gIyBmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7ICMgICAgIyAgIHJlYWRfZGZzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4gIyAgICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8PiAjICAgICBjb2xsZWN0KCkgIyAgICAjICAgICByZWFkX2RmcyRMT0NfTVVOIDwtIHBhc3RlKHJlYWRfZGZzJExPQywgcmVhZF9kZnMkTVVOLCBzZXAgPSAiXyIpICMgICMgICBpZiAobnJvdyhyZWFkX2RmcykgPT0gbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyRMT0NfTVVOKSkpIHsgIyAgICAgcHJpbnQocGFzdGUoIkxvY2FsaXRpZXMgVW5pcXVlIiwgdmFsdWUpKSAjICAgfSBlbHNlIHsgIyAgICAgICAgIHByaW50KHBhc3RlKCJOT1QgT0siLCB2YWx1ZSwgbnJvdyhyZWFkX2RmcyksICI8PiIsIGxlbmd0aCh1bmlxdWUocmVhZF9kZnMkTE9DX01VTikpKSkgIyAgIyAgIH0gIyAgIyB9YA0KDQpgYGB7cn0NCmZvcih2YWx1ZSBpbiBlbnRpdGllc19jc3YpIHsNCiAgDQogIHJlYWRfZGZzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4NCiAgICBmaWx0ZXIoTk9NX0VOVD09dmFsdWUpIHw+DQogICAgY29sbGVjdCgpDQogIA0KICAgIHJlYWRfZGZzJExPQ19NVU4gPC0gcGFzdGUocmVhZF9kZnMkTE9DLCByZWFkX2RmcyROT01fTVVOLCBzZXAgPSAiXyIpDQoNCiAgaWYgKG5yb3cocmVhZF9kZnMpID09IGxlbmd0aCh1bmlxdWUocmVhZF9kZnMkTE9DX01VTikpKSB7DQogICAgcHJpbnQocGFzdGUoIkxvY2FsaXRpZXMgVW5pcXVlIiwgdmFsdWUpKQ0KICB9IGVsc2Ugew0KICAgICAgICBwcmludChwYXN0ZSgiTk9UIE9LIiwgdmFsdWUsIG5yb3cocmVhZF9kZnMpLCAiPD4iLCBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJExPQ19NVU4pKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KIyMjIENvbmNsdXNpb24NCg0KIyMjIyBBcHBlbmQgY29kZSB0byBNVU4gYW5kIExPQw0K